fillcolor = "#D83434"
file_name <- 'yelp_academic_dataset_review.json'
review<-jsonlite::stream_in(textConnection(readLines(file_name, n=280984)), flatten = TRUE)
summary(review)
##   review_id           user_id          business_id            stars      
##  Length:280984      Length:280984      Length:280984      Min.   :1.000  
##  Class :character   Class :character   Class :character   1st Qu.:3.000  
##  Mode  :character   Mode  :character   Mode  :character   Median :4.000  
##                                                           Mean   :3.838  
##                                                           3rd Qu.:5.000  
##                                                           Max.   :5.000  
##      useful            funny               cool             text          
##  Min.   :  0.000   Min.   :  0.0000   Min.   : 0.0000   Length:280984     
##  1st Qu.:  0.000   1st Qu.:  0.0000   1st Qu.: 0.0000   Class :character  
##  Median :  0.000   Median :  0.0000   Median : 0.0000   Mode  :character  
##  Mean   :  0.912   Mean   :  0.2606   Mean   : 0.3515                     
##  3rd Qu.:  1.000   3rd Qu.:  0.0000   3rd Qu.: 0.0000                     
##  Max.   :320.000   Max.   :284.0000   Max.   :79.0000                     
##      date          
##  Length:280984     
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
ggplot(review, aes(x = stars)) +
  geom_histogram(aes(color = stars, fill = fillcolor),binwidth = 1) +
  xlab("Rating") +
  ylab("Count") +
  ggtitle("Count of Yelp Review Ratings") +
  theme_classic() +
  transition_states(stars, transition_length = 3, state_length = 1) +
  shadow_mark(alpha = .3)

anim_save("ratinghistogram.gif")
file_name <- 'yelp_academic_dataset_business.json'
business<-jsonlite::stream_in(textConnection(readLines(file_name, n=280984)), flatten = TRUE)
summary(business)
##  business_id            name             address              city          
##  Length:150346      Length:150346      Length:150346      Length:150346     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     state           postal_code           latitude       longitude      
##  Length:150346      Length:150346      Min.   :27.56   Min.   :-120.10  
##  Class :character   Class :character   1st Qu.:32.19   1st Qu.: -90.36  
##  Mode  :character   Mode  :character   Median :38.78   Median : -86.12  
##                                        Mean   :36.67   Mean   : -89.36  
##                                        3rd Qu.:39.95   3rd Qu.: -75.42  
##                                        Max.   :53.68   Max.   : -73.20  
##      stars        review_count        is_open        categories       
##  Min.   :1.000   Min.   :   5.00   Min.   :0.0000   Length:150346     
##  1st Qu.:3.000   1st Qu.:   8.00   1st Qu.:1.0000   Class :character  
##  Median :3.500   Median :  15.00   Median :1.0000   Mode  :character  
##  Mean   :3.597   Mean   :  44.87   Mean   :0.7962                     
##  3rd Qu.:4.500   3rd Qu.:  37.00   3rd Qu.:1.0000                     
##  Max.   :5.000   Max.   :7568.00   Max.   :1.0000                     
##  attributes.ByAppointmentOnly attributes.BusinessAcceptsCreditCards
##  Length:150346                Length:150346                        
##  Class :character             Class :character                     
##  Mode  :character             Mode  :character                     
##                                                                    
##                                                                    
##                                                                    
##  attributes.BikeParking attributes.RestaurantsPriceRange2 attributes.CoatCheck
##  Length:150346          Length:150346                     Length:150346       
##  Class :character       Class :character                  Class :character    
##  Mode  :character       Mode  :character                  Mode  :character    
##                                                                               
##                                                                               
##                                                                               
##  attributes.RestaurantsTakeOut attributes.RestaurantsDelivery
##  Length:150346                 Length:150346                 
##  Class :character              Class :character              
##  Mode  :character              Mode  :character              
##                                                              
##                                                              
##                                                              
##  attributes.Caters  attributes.WiFi    attributes.BusinessParking
##  Length:150346      Length:150346      Length:150346             
##  Class :character   Class :character   Class :character          
##  Mode  :character   Mode  :character   Mode  :character          
##                                                                  
##                                                                  
##                                                                  
##  attributes.WheelchairAccessible attributes.HappyHour attributes.OutdoorSeating
##  Length:150346                   Length:150346        Length:150346            
##  Class :character                Class :character     Class :character         
##  Mode  :character                Mode  :character     Mode  :character         
##                                                                                
##                                                                                
##                                                                                
##  attributes.HasTV   attributes.RestaurantsReservations attributes.DogsAllowed
##  Length:150346      Length:150346                      Length:150346         
##  Class :character   Class :character                   Class :character      
##  Mode  :character   Mode  :character                   Mode  :character      
##                                                                              
##                                                                              
##                                                                              
##  attributes.Alcohol attributes.GoodForKids attributes.RestaurantsAttire
##  Length:150346      Length:150346          Length:150346               
##  Class :character   Class :character       Class :character            
##  Mode  :character   Mode  :character       Mode  :character            
##                                                                        
##                                                                        
##                                                                        
##  attributes.Ambience attributes.RestaurantsTableService
##  Length:150346       Length:150346                     
##  Class :character    Class :character                  
##  Mode  :character    Mode  :character                  
##                                                        
##                                                        
##                                                        
##  attributes.RestaurantsGoodForGroups attributes.DriveThru attributes.NoiseLevel
##  Length:150346                       Length:150346        Length:150346        
##  Class :character                    Class :character     Class :character     
##  Mode  :character                    Mode  :character     Mode  :character     
##                                                                                
##                                                                                
##                                                                                
##  attributes.GoodForMeal attributes.BusinessAcceptsBitcoin attributes.Smoking
##  Length:150346          Length:150346                     Length:150346     
##  Class :character       Class :character                  Class :character  
##  Mode  :character       Mode  :character                  Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  attributes.Music   attributes.GoodForDancing attributes.AcceptsInsurance
##  Length:150346      Length:150346             Length:150346              
##  Class :character   Class :character          Class :character           
##  Mode  :character   Mode  :character          Mode  :character           
##                                                                          
##                                                                          
##                                                                          
##  attributes.BestNights attributes.BYOB    attributes.Corkage
##  Length:150346         Length:150346      Length:150346     
##  Class :character      Class :character   Class :character  
##  Mode  :character      Mode  :character   Mode  :character  
##                                                             
##                                                             
##                                                             
##  attributes.BYOBCorkage attributes.HairSpecializesIn attributes.Open24Hours
##  Length:150346          Length:150346                Length:150346         
##  Class :character       Class :character             Class :character      
##  Mode  :character       Mode  :character             Mode  :character      
##                                                                            
##                                                                            
##                                                                            
##  attributes.RestaurantsCounterService attributes.AgesAllowed
##  Length:150346                        Length:150346         
##  Class :character                     Class :character      
##  Mode  :character                     Mode  :character      
##                                                             
##                                                             
##                                                             
##  attributes.DietaryRestrictions hours.Monday       hours.Tuesday     
##  Length:150346                  Length:150346      Length:150346     
##  Class :character               Class :character   Class :character  
##  Mode  :character               Mode  :character   Mode  :character  
##                                                                      
##                                                                      
##                                                                      
##  hours.Wednesday    hours.Thursday     hours.Friday       hours.Saturday    
##  Length:150346      Length:150346      Length:150346      Length:150346     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  hours.Sunday      
##  Length:150346     
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
business<-business[1:12]
business
ABCDEFGHIJ0123456789
 
 
business_id
<chr>
name
<chr>
1Pns2l4eNsfO8kk83dixA6AAbby Rappoport, LAC, CMQ
2mpf3x-BjTdTEA3yCZrAYPwThe UPS Store
3tUFrWirKiKi_TAnsVWINQQTarget
4MTSW4McQd7CbVtyjqoe9mwSt Honore Pastries
5mWMc6_wTdE0EUBKIGXDVfAPerkiomen Valley Brewery
6CF33F8-E6oudUQ46HnavjQSonic Drive-In
7n_0UpQx1hsNbnPUSlodU8wFamous Footwear
8qkRM_2X51Yqxk3btlwAQIgTemple Beth-El
9k0hlBqXX-Bt0vf1op7Jr1wTsevi's Pub And Grill
10bBDDEgkFA1Otx9Lfe7BZUQSonic Drive-In
business_wide<-business%>%
       mutate(categories = strsplit(categories, ", ")) %>%
     unnest(categories) %>% 
     arrange(categories) %>%  
     pivot_wider(names_from = categories,
            names_prefix = "categories_",
            names_repair = "universal", 
            values_from = categories, 
            values_fill = 0, 
            values_fn = length)
categories = str_split(business$categories,";")
categories = as.data.frame(unlist(categories))
colnames(categories) = c("Name")

categories %>%
  group_by(Name) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  ungroup() %>%
  mutate(Name = reorder(Name,Count)) %>%
  head(10) %>%
  
  
  ggplot(aes(x = Name,y = Count)) +
  geom_bar(stat='identity',colour="white", fill =fillcolor) +
  labs(x = 'Name of Category', y = 'Count', 
       title = 'Top 10 Categories of Business') +
  coord_flip() + 
  theme_classic()

restaurants <- business[grepl('Restaurant',business$categories),]
dim(restaurants)
## [1] 52286    12
#categories in restuarant
business %>%
 select(categories) %>%
 filter(str_detect(categories, "Restaurant"))
ABCDEFGHIJ0123456789
categories
<chr>
Restaurants, Food, Bubble Tea, Coffee & Tea, Bakeries
Burgers, Fast Food, Sandwiches, Food, Ice Cream & Frozen Yogurt, Restaurants
Pubs, Restaurants, Italian, Bars, American (Traditional), Nightlife, Greek
Ice Cream & Frozen Yogurt, Fast Food, Burgers, Restaurants, Food
Vietnamese, Food, Restaurants, Food Trucks
American (Traditional), Restaurants, Diners, Breakfast & Brunch
Food, Delis, Italian, Bakeries, Restaurants
Sushi Bars, Restaurants, Japanese
Korean, Restaurants
Coffee & Tea, Food, Cafes, Bars, Wine Bars, Restaurants, Nightlife
business %>%
  type_convert(cols(stars = col_double()))%>%
  select(state,stars) %>%
  group_by(state)%>%
  summarize(Stars=median(stars))%>%
  arrange(desc(Stars))%>%
  head(10)
ABCDEFGHIJ0123456789
state
<chr>
Stars
<dbl>
MT5.00
SD4.50
UT4.50
VT4.50
HI4.25
CA4.00
CO4.00
FL4.00
ID4.00
IN4.00
#Finding out which business has most reviews in dataset
maxreviews<-review %>%
  group_by(business_id) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  head(5) %>%
  inner_join(business)
## Joining with `by = join_by(business_id)`
maxreviews
ABCDEFGHIJ0123456789
business_id
<chr>
Count
<int>
name
<chr>
address
<chr>
city
<chr>
GBTPC53ZrG1ZBY3DT8Mbcw2609Luke333 Saint Charles AveNew Orleans
PY9GRfzr4nTZeINf346QOw1272Peppermill Reno2707 S Virginia StReno
W4ZEKkva9HpAdZG88juwyQ1182Mr. B's Bistro201 Royal StNew Orleans
vN6v8m4DO45Z4pp8yxxF_w1177Surrey's Café & Juice Bar1418 Magazine StNew Orleans
SZU9c8V2GuREDN5KgyHFJw1067Santa Barbara Shellfish Company230 Stearns WharfSanta Barbara
##Show the establishments with the most number of 5 star reviews (top 5)
review %>%
  filter(stars == 5) %>%
  group_by(business_id) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count)) %>%
  head(5)%>%
inner_join(business_wide)
## Joining with `by = join_by(business_id)`
ABCDEFGHIJ0123456789
business_id
<chr>
Count
<int>
name
<chr>
address
<chr>
city
<chr>
state
<chr>
GBTPC53ZrG1ZBY3DT8Mbcw1254Luke333 Saint Charles AveNew OrleansLA
vN6v8m4DO45Z4pp8yxxF_w706Surrey's Café & Juice Bar1418 Magazine StNew OrleansLA
8uF-bhJFgT4Tn6DTb27viA653District Donuts Sliders Brew2209 Magazine StNew OrleansLA
W4ZEKkva9HpAdZG88juwyQ625Mr. B's Bistro201 Royal StNew OrleansLA
UCMSWPqzXjd7QHq7v8PJjQ608Prep & Pastry2660 N Campbell AveTucsonAZ
lukebusiness=business %>%
  filter(name=="Luke")
lukeJoined_tbl <-tibble( inner_join(lukebusiness,review))
## Joining with `by = join_by(business_id, stars)`
lukeDateFormatted<-lukeJoined_tbl%>%
 mutate(date_formatted = as_date(date),
 month_formatted=month(date),
 day_formatted=day(date),
 year_formatted=year(date),
 hour_formatted=hour(date))
lukeDateFormatted%>%
  select(year_formatted)%>%
  group_by (year_formatted)%>%
  summarise(NumberofReviews = n())%>%
  ggplot(aes (x=year_formatted,y=NumberofReviews,color=fillcolor)) +
  xlab("Year") +
  ylab("Review Count") +
  ggtitle("Yearly review count for Luke") +
  geom_line() + 
  scale_x_continuous(breaks = seq(2008, 2018, 1),
                     labels = seq(2008, 2018, 1)) +
  theme_classic() +
  transition_reveal(year_formatted)
## `geom_line()`: Each group consists of only one observation.
## i Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## i Do you need to adjust the group aesthetic?

anim_save("lukenoreviewsyear.gif")
createWordCloud = function(x)
{
  lukeDateFormatted %>%
    unnest_tokens(word, text) %>%
    filter(!word %in% stop_words$word, !word %in% "luke" & !word %in% "restaurant" & !word %in% "menu") %>%
    count(word,sort = TRUE) %>%
    ungroup() %>%
    head(30) %>%
    with(wordcloud(word, n, max.words = 30,colors=brewer.pal(8, "Dark2")))
}

createWordCloud(review)

positiveWordsBarGraph <- function(SC) {
  contributions <- SC %>%
    unnest_tokens(word, text) %>%
    count(word,sort = TRUE) %>%
    ungroup() %>%
    
    inner_join(get_sentiments("afinn"), by = "word") %>%
    group_by(word) %>%
    summarize(contribution = sum(value), n=n())
  
  contributions %>%
    top_n(20, abs(contribution)) %>%
    mutate(word = reorder(word, contribution)) %>%
    head(20) %>%
    ggplot(aes(word, contribution, fill = contribution > 0)) +
    geom_col(show.legend = FALSE) +
    coord_flip() + theme_bw()
}
positiveWordsBarGraph(lukeJoined_tbl)